/* 

****************************************************************************

Summary:

Code for creating panel analysis data for public housing residents that lived in
or near demolished buildings.

****************************************************************************

*/


clear
set more off
cap log close

global nexpbase          ""
 // directory for storing file with demographic information for IDHS data
global demo              ""
 // directory for storing demolition related files
global temp              ""
 // directory for temporary files created for this project				     
global xwalk_assist      ""
 // directory for cross-walk file which contains IDs for the social assistance files
global labor             ""
 // directory for IDES data
global match_ui          ""
 // directory for cross-walk file for IDES and Chapin files
global geo               ""
 // directory for geocoded address assistance data file
global dofiles           ""
 // directory for replication do-files

global step1=0     // create demographic information 
global step2=0     // create household composition variables
global step3=0     // create pre-demo crime
global step4=0     // create address history panel (for relocation analysis)
global step5=0     // create monthly assistance file
global step6=0     // create yearly panel -- this is the analysis file

cap net install geodist, from(http://fmwww.bc.edu/RePEc/bocode/g/)

********************************************************************************
*** STEP 1. 
*** Prepare demographic information
*** Output: temp files with demographic information that will be used to create a panel
********************************************************************************

if $step1==1 {

* Prepare demographic information

/* THERE IS NO CHDHSID_200606 IN THE RACE FILE BUT THE OBS ARE SORTED IN THE SAME ORDER AS IN  
THE BIRTHDAY AND SEX FILE, SO WE HAVE TO FIRST MERGE RACE ONTO THAT FILE */
gzuse "${nexpbase}file3_hh_newrace.dta.gz", clear // this file is based on the original IDHS admin data
gen id=_n
save "${temp}temp_race.dta", replace

/* Fixed DOB, sex variables */
gzuse "${nexpbase}file3_hh_demog2.dta.gz", clear // this file is based on the original IDHS admin data
gen id=_n
mmerge id using "${temp}temp_race.dta", type(1:1)
erase "${temp}temp_race.dta"
count if race!=race_new & race!=0
/* EQUALS 0 - THIS MEANS THAT THE ONLY DISCEPENCIES BETWEEN THE OLD AND NEW RACE  
ARE THAT WE FILL IN WHAT WERE MISSINGS IN THE OLD RACE */
drop race id
rename race_new race
des
save "${temp}temp_demog.dta", replace

/* Cook county IDHS assistance sample */
gzuse "${nexpbase}file3_households_final.dta.gz", clear // this file is based on the original IDHS admin data

duplicates report chdhsid_200606
duplicates drop chdhsid_200606, force
drop if chdhsid_200606==""
drop birthdate race sex
mmerge chdhsid_200606 using "${temp}temp_demog.dta", type(1:1)
erase "${temp}temp_demog.dta"
drop _merge

/* Generate dummy variables */
**SEX CODING RULE: SIMPLY MAKE ALL OBS THAT HAVE VALUE OF 1=MALE, 2=FEMALE, AND 0=MISSING; *MALE DUMMY; 
gen byte male=(sex==1) if sex!=0
drop sex

**RACE CODING RULE:  1=WHITE, 2=BLACK, AND 6=HISPANIC;  *RACE DUMMIES
gen byte black=(race==2) if !inlist(race,0,4,5,8,9)
gen byte hispanic=(race==6) if !inlist(race,0,4,5,8,9)
gen byte white=(race==1) if !inlist(race,0,4,5,8,9)
gen byte raceother=(race==3 | race==7) if !inlist(race,0,4,5,8,9)
*REPLACE THE FOLLOWING FOR MISSING. ZERO ACTUALLY IS MISSING, THE OTHER'S WE DON'T KNOW WHAT THEY ARE; 
foreach x of varlist black-raceother {
   replace `x'=0 if inlist(race,0,4,5,8,9)
}
drop race

** Merge on Chapin CHDHSID 201212 ID -- the 2012 ID # is the one used in the assistance files
merge 1:n chdhsid_200606 using "${xwalk_assist}crosswalk_2006_12.dta", keep(1 3) keepusing(chdhsid_201212)
 // this file comes from Chapin Hall to link different versions of their ID variables
drop _merge

duplicates report chdhsid_201212

* randomly select a 2012 ID to keep (need this file to be unique)
set seed 1234
sort chdhsid_201212
by chdhsid_201212: gen temp = runiform()
sort chdhsid_201212 temp
by chdhsid_201212: gen temp2 = 1 if _n==1
keep if temp2==1
drop temp*

duplicates report chdhsid_201212

compress

gzsave "${temp}temp_file3_bdates_chdhsid_201212.dta.gz", replace

}


********************************************************************************
*** STEP 2. 
*** Create household composition measures (e.g. mother indicator, etc.) (6/21/2014)
********************************************************************************

if $step2==1 {

gzuse  "${temp}temp_file3_bdates_chdhsid_201212.dta.gz", clear
 // file created in Step 1 above

assert targetid~=""

* make the file unique at the chdhsid_200606 level (note: not unique at this lvl b/c merge with 201212 ID # generates extras)
keep targetid chdhsid_200606 male birthdate
duplicates drop
 /* must make the file unique at the chdhsid_200606 to properly count up # of persons per household */

rename targetid household_id

* make family size
bysort household_id: egen fam_size = total(1==1)

* make # of adults
gen bdate=date(birthdate,"YMD")
format bdate %td

gen age = (td(1 Jan 1997)-bdate)/365.25
gen adult = (age>18 & age~=.)

bysort household_id: egen total_adults = total(adult)

* make # of kids
gen kid = (age<=18 & age~=.)
bysort household_id: egen total_kids = total(kid)
assert total_kids~=.

gen hh_has_kids = (total_kids>0)

gen girl_kid = (age<=18 & age~=.) if male==0
bysort household_id: egen total_girls = total(girl_kid)
assert total_girls~=.

gen boy_kid = (age<=18 & age~=.) if male==1
bysort household_id: egen total_boys = total(boy_kid)
assert total_boys~=.

* flag mothers
gen temp = 0
 replace temp = 1 if adult==1 & male==0
 
bysort household_id: egen total_females = total(temp)
drop temp

gen mother = 0 
 replace mother = 1 if (adult==1 & male==0) & (total_females==1 & total_kids>0) 
 
* randomly select some female in the household to be a mother
set seed 1234 
gen temp = runiform() if adult==1 & male==0
sort household_id temp
by household_id: gen temp2 = (_n==1)
replace mother = 1 if (adult==1 & male==0) & (total_females>1 & total_kids>0) & temp2==1
drop temp temp2
 
* check
bysort household_id: egen total_mothers = total(mother)
assert total_mothers<=1
drop total_females

* household has teen mother flag
gen temp = age if age<=18
bysort household_id: egen hh_max_kid_age = max(temp)
gen temp2 = age if mother==1
bysort household_id: egen hh_mom_age = max(temp2)
gen temp3 = hh_mom_age-hh_max_kid_age

gen hh_teen_mom = ((hh_mom_age-hh_max_kid_age<18))
assert hh_teen_mom == 0 if total_mothers==0
assert hh_teen_mom ==0 if total_kids==0

drop temp temp2 temp3

* eldest kid flag
gen oldest_kid = 0
 replace oldest_kid = 1 if total_kids>1 & hh_max_kid_age==age

* flag fathers
gen temp = 0
 replace temp = 1 if adult==1 & male==1

bysort household_id: egen total_males = total(temp)

gen father = 0 
 replace father = 1 if (adult==1 & male==1) & (total_males==1 & total_kids>0) 

* randomly select some male in the household to be a father
set seed 1234 
gen temp = runiform() if adult==1 & male==1
sort household_id temp
by household_id: gen temp2 = (_n==1)
replace father = 1 if (adult==1 & male==1) & (total_males>1 & total_kids>0) & temp2==1
drop temp temp2
 
* check
bysort household_id: egen total_fathers = total(father)
assert total_fathers<=1
drop total_males

gen flag = (total_father==0 & total_mother==0 & total_kids>0)
lab var flag "flag for problem in household construction"

* assert that there is one household ID per person ID
bysort chdhsid_200606 household_id: gen n = 1 if _n==1
bysort chdhsid_200606: egen temp = total(n)
assert temp==1
drop temp

* retrieve chdhsid_2012 from file created in step 1
!gunzip "${temp}temp_file3_bdates_chdhsid_201212.dta.gz"
merge 1:n chdhsid_200606 using "${temp}temp_file3_bdates_chdhsid_201212.dta", keepusing(chdhsid_201212)
!gzip "${temp}temp_file3_bdates_chdhsid_201212.dta"

assert _merge==3
drop _merge

keep chdhsid_201212 household_id total_adults total_kids total_boys total_girls total_mothers total_fathers fam_size mother father kid flag hh_teen_mom hh_mom_age oldest_kid

* insure unique at CHDHSID_201212 ID lvl
bysort chdhsid_201212: egen temp = total(1==1)
assert temp==1

drop temp

rename total_adults hh_total_adults
rename total_kids hh_total_kids
rename total_girls hh_total_girls
rename total_boys hh_total_boys
rename fam_size hh_fam_size
rename total_mothers hh_has_mother
rename total_fathers hh_has_father
rename kid hh_kid
rename flag hh_flag

compress
gzsave "${temp}temp_file3_hh_vars_v2.dta.gz", replace

}


********************************************************************************
*** STEP 3. 
*** Create baseline (prior to the demolition) arrest measures
********************************************************************************

if $step3==1 {

* open list of public-housing-demolition residents -- these are both treatments and controls
use "${demo}list_chdhsid_demo_residents.dta", clear
 // file created by "make_social_assistance_PH_sample_final.do"

* obtain demographics + chdhsid_200606
!gunzip "${temp}temp_file3_bdates_chdhsid_201212.dta.gz" // this is created in a step above
merge n:1 chdhsid_201212 using "${temp}temp_file3_bdates_chdhsid_201212.dta", keep(1 3) keepusing(chdhsid_200606 birthdate male black hispanic white raceother)
!gzip "${temp}temp_file3_bdates_chdhsid_201212.dta"
drop _merge 
 /* 100% match rate */

duplicates report chdhsid_200606

* Get ID
merge n:1 chdhsid_200606 using "${match_ui}crosswalk_chdhsid_200606_IDES.dta", keep(1 3) keepusing(IDES_final)
 // file is a cross-walk file from Chapin Hall
 
*** checking:
gen bdate=date(birthdate,"YMD")
format bdate %td
gen bdate_year=year(bdate)
drop bdate* 
drop _merge

* Create treatment year
gen treat_yr = regexs(0) if(regexm(proj_grp, "[0-9][0-9][0-9][0-9]"))
assert treat_yr~=""
destring treat_yr, replace

* Create calendar year prior to demolition event
gen last_year = treat_yr-1
 lab var last_year "last year before demolition, person-specific"
  
* Save a list of IDs and the year before their demo event
keep chdhsid_200606 last_year
sort chdhsid_200606 last_year
by chdhsid_200606: gen n = 1 if _n==1
keep if n==1
drop n

lab data "List of IDs and last year before demo"
save "${temp}temp_chdhsid_pre-demoyr.dta", replace

* Open unbalanced arrest records
gzuse "${temp}weekly_crime_data_unbalanced.dta.gz", clear
 // this file was created using raw ISP arrest data
 // contains arrest information at the weekly level

* Merge with the PH IDs
rename studyid chdhsid_200606
merge n:1 chdhsid_200606 using "${temp}temp_chdhsid_pre-demoyr.dta", keep(3)
 // using file is created above

* filter observations to arrests prior to the demolition year
keep if year<=last_year

collapse (sum) xxviolent=violent xxproperty=property xxdrugs=drugs xxother=other, by(chdhsid_200606)

save "${temp}temp_pre-demo_crime.dta", replace
 /* file stores pre-demo arrest data by CHDHSID 200606. 
    Note that distinct people (CHDHSID_201212) may 
	map to the same 200606 record and in this case
	will have the same pre-demo history
 */

* clean-up
erase "${temp}temp_chdhsid_pre-demoyr.dta"

}

********************************************************************************
*** STEP 4. 
*** Clean panel of address histories and merge on neighborhood measures
********************************************************************************

if $step4==1 {

** Step 4.1

* open address file which has the geocoded info:
gzuse "${geo}xwalk_addresses_final_spell_exp.dta.gz", clear
 // this file is a geocoded version of IDHS address data
keep address city state zip match_* geocoder county2000 tract2000 block2000 latitude longitude

drop if address==""
drop if geocode==""
drop if zip==""

* drop if missing county/tract information (this is what I need from this file)
drop if county2000==.
drop if tract2000==""

duplicates drop address city state zip, force
 
* save this geocode information -- has tract numbers for the addresses
compress
gzsave "${temp}temp_geo_address.dta.gz", replace

** Step 4.2

* open the panel history of addresses created elsewhere
gzuse "${temp}PH_sample_month_address_nonexp_EC_v3.dta.gz", clear
 // file was created by "make_social_assistance_PH_sample_final.do"
 // this is the monthly address history for each person with light cleaning
 // note that this file is not balanced. everyone in the data is on a case, but I have already set addresses to missing if they are on more than one case
 
count if address==""
gen has_addr=(address~="")

* Merge the panel with the geocoded addresses
!gunzip "${temp}temp_geo_address.dta.gz"
merge n:1 address state city zip using "${temp}temp_geo_address.dta", keep(1 3)
 // file is created above
!gzip "${temp}temp_geo_address.dta"

gen not_geocode = (_merge==1 & has_addr==1)
 lab var not_geocode "Has addr, not found in geocode"

rename _merge merge_geo

* Merge on neighborhood characteristics;
gen county=string(county,"%03.0f")
 replace county="" if county=="."
 replace county="" if state!="IL" | (state=="IL" & county!="031") 
gen tract=tract
 replace tract="" if county==""
 
* check tract (should be a 6 digit number)
gen temp=length(tract)
assert temp==0 | temp==6

* Merge 1990 Census (you must have the 1990 Census data)
merge n:1 county tract using "${demo}census_1990_cook_county.dta", keep(1 3)
 // you must have the 1990 Census data to obtain measures of neighborhood (tract) characteristics
rename _merge merge_census

* remove all the missing addresses 
drop if address==""
 /* this mostly occurs because people are on multiple cases in a given month
    the other reason this could occur is because the "pull-date" for the address
	takes place after the spell begins 
 */
 
* remove the valid addresses that couldn't be geocoded
drop if has_addr==1 & merge_geo==1

gen moved=(county2000~=31 & county2000~=.)
 lab var moved "Moved from Cook County"
 
* drop non-moves (moved==0) where there is no Census info
drop if merge_census==1 & moved==0

* now keep the first address in a calendar year (this is assured to be non-missing by the above)
bysort chdhsid_201212 year: gen n = 1 if _n==1
keep if n==1

* check
bysort chdhsid_201212 year: egen nvals = nvals(address)
 assert nvals==1
drop nvals

sort chdhsid year  
keep chdhsid_201212 year address state city zip county2000 tract2000 lat longit tpop p*

* save this file (will merge this to the analysis data)
gzsave "${temp}temp_addr_census_panel.dta.gz", replace

}




********************************************************************************
*** STEP 5. 
*** Create a monthly panel of assistance receipt
********************************************************************************


if $step5==1 {

gzuse "${temp}spell_person_case_month_panel_nonexp_EC_v2.dta.gz", clear
 // file created in "make_social_assistance_PH_sample_final.do"
collapse (max) assistance foodstamp medicaid grant grantee, by(chdhsid_201212 year month)
 
assert foodstamp~=.

compress
gzsave "${temp}temp_assist_monthly.dta.gz", replace

}


********************************************************************************
*** STEP 6. 
*** Create analysis data
********************************************************************************

if $step6==1 {

/*

   Prepare sample, get cross-sectional information on the public housing sample
   based on the time of their demolition event
   
*/

* open list of public-housing-demolition residents -- these are both treatments and controls
use  "${demo}list_chdhsid_demo_residents.dta", clear
 // sample of residents created by "make_social_assistance_PH_sample_final.do"

* obtain demographics + chdhsid_200606 (these are demographic files based on IDHS data)
!gunzip "${temp}temp_file3_bdates_chdhsid_201212.dta.gz"
merge n:1 chdhsid_201212 using "${temp}temp_file3_bdates_chdhsid_201212.dta", keep(1 3) keepusing(chdhsid_200606 birthdate male black hispanic white raceother)
!gzip "${temp}temp_file3_bdates_chdhsid_201212.dta"
drop _merge // 100% match rate

* Get IDES ID (to merge on UI data) (this is a crosswalk data file that links IDES data to Chapin ID)
merge n:1 chdhsid_200606 using "${match_ui}crosswalk_chdhsid_200606_IDES.dta", keep(1 3) keepusing(IDES_final)
gen no_IDES = (_merge==1)
drop _merge
   
* birthdate formatting
gen bdate=date(birthdate,"YMD")
format bdate %td
gen bdate_year=year(bdate)
drop if bdate_year>1990

* "treatment" year for each person (both demolition and non-demo residents)
gen treat_yr = regexs(0) if(regexm(proj_grp, "[0-9][0-9][0-9][0-9]"))
assert treat_yr~=""
destring treat_yr, replace

* Create age at time of the demolition (there are different demolition years)
gen age_demo = treat_yr-bdate_year
assert age_demo~=.

* Categorize age-groups; based on age at time of demolition
gen age_5_10 = (age_demo<11) 
gen age_5_13 = (age_demo<14)
gen age_14_18 = (age_demo>=14 & age_demo<=18)
gen adult = (age_demo>18)

* check
assert age_5_13==1 if age_5_10==1

* rename
rename age_demo xxage
lab var xxage "Age (integer) at time of demolition"
 
*** Get pre-demo crime information
********************************************************************************
merge n:1 chdhsid_200606 using "${temp}temp_pre-demo_crime.dta", keep(1 3) // this file is created in a step above
 
* Replacing missing with 0 and rename
foreach var of varlist xxviolent xxproperty xxdrug xxother {
 replace `var' = 0 if _merge==1
}

gen xxany_arrest = xxviolent+xxproperty+xxdrug+xxother

drop _merge

replace xxany_arrest=0 if xxage<14
replace xxviolent=0 if xxage<14
replace xxproperty=0 if xxage<14
replace xxdrug=0 if xxage<14
replace xxother=0 if xxage<14

*** Get demolition address tract-level characteristics (including lat/long coordinates)
********************************************************************************

** manual cleaning to get the baseline tract to merge correctly
replace address="6215 SOUTH WABASH" if regexm(address, "6215 SOUTH WABASH")
replace address="220 EAST 63RD" if address=="220 EAST 63 DRIVE STREET"
replace address="2245 WEST LAKE STREET" if address=="2245 WEST LAKE SUITE"
replace address="4101 SOUTH FEDERAL" if address=="4101 SOUTH FEDERAL APARTMTN"
replace address="2417 WEST ADAMS STREET" if address=="2417 WEST ADAMSST"
replace address="2517 WEST ADAMS STREET" if address=="2517 WEST ADAMS STR"
replace address="3653 SOUTH FEDERAL" if address=="3653 SOUTH FEDERALD"
replace address="3750 SOUTH ELLIS" if regexm(address, "3750 SOUTH ELLIS")
replace address="3837 SOUTH ELLIS" if regexm(address,"3837 SOUTH ELLIS")
replace address="3855 SOUTH ELLIS" if regexm(address,"3855 SOUTH ELLIS")
replace address="4022 SOUTH STATE STREET" if address=="4022 SOUTH SSTATE"
replace address="4022 SOUTH STATE STREET" if address=="4022 SOUTH STATE AP"
replace address="4946 SOUTH STATE" if address=="4946 SOUTH 0 STATE"
replace address="510 EAST 36TH PLACE" if address=="510 EAST 36 T 5 H PLACE"
replace address="111 NORTH WOOD" if regexm(address, "111 NORTH WOOD")
replace address="1815 WEST MONROE" if regexm(address, "1815 WEST MONROE")
replace address="3616 SOUTH STATE" if regexm(address, "3616 SOUTH STATE")
replace address="120 NORTH HERMITAGE" if regexm(address, "120 NORTH HERMITAGE")
replace address="514 EAST 36TH STREET" if regexm(address, "540 EAST 36TH STR")
replace address="3549 SOUTH FEDERAL" if regexm(address,  "3549 SOUTH FEDERAL")

replace zip = "60609" if regexm(address, "4444 SOUTH STATE")
replace zip = "60609" if regexm(address, "4946 SOUTH STATE")
replace zip = "60609" if regexm(address, "4950 SOUTH STATE")

* Get geocoded info (file made in step above)
!gunzip "${temp}temp_geo_address.dta.gz"
merge n:1 address state city zip using "${temp}temp_geo_address.dta", keep(1 3) keepusing(county2000 tract2000 latitude longitude)
!gzip "${temp}temp_geo_address.dta"

drop _merge

* Merge on neighborhood characteristics;
gen county=string(county,"%03.0f")
 replace county="" if county=="."
 replace county="" if state!="IL" | (state=="IL" & county!="031") 
gen tract=tract
 replace tract="" if county==""
 
* check tract (should be a 6 digit number)
gen temp=length(tract)
assert temp==0 | temp==6
drop temp

* Merge 1990 Census characteristics for baseline address (you must have the 1990 Census data files)
merge n:1 county tract using "${demo}census_1990_cook_county.dta", keep(1 3) keepusing(tpop p*)
assert _merge==3 if county~=""
drop _merge county tract

* Rename variables with "xx" to denote baseline status
rename address xxaddress
rename city xxcity
rename state xxstate
rename zip xxzip
rename county2000 xxcounty2000
rename tract2000 xxtract2000
rename latitude xxlatitude
rename longitude xxlongitude

foreach var of varlist tpop pblack-powner {
 rename `var' xx`var'
 }
 
rename xxpemployed xxpunemployed
 
* Merge PHCDN Data (obtained from PHCDN survey)
gen tract = xxtract2000 if xxcounty2000==31
gen length = length(tract)
assert length==6 if tract~=""
 
merge n:1 tract using "${demo}phcdn_xsec.dta", keep(1 3) keepusing(nbh_ebcoleff nbh_ebcapita nbh_ebpolice)
  /* notes: these measures are time invariant */
  /* for more information on this survey, see Sampson, Raudenbush and Earls 1997 */
  assert _merge==3 if tract~=""

drop tract _merge length

* Baseline measures of neighborhood quality
rename nbh_ebcoleff xxebcoleff
rename nbh_ebcapita xxebcapita 
rename nbh_ebpolice xxebpolice

* Merge baseline neighborhood crime data (data from Chicago police)
gen tract = xxtract2000 if xxcounty2000==31
gen length = length(tract)
assert length==6 if tract~=""
drop length

merge n:1 tract using "${demo}xwalk_beat_tract.dta", keep(1 3) keepusing(beat_num)
 // cross-walk file between Census tracts and police beats
drop tract _merge

gen year = 1995

!gunzip "${temp}beat_crime_panel.dta.gz"
merge n:1 beat_num year using "${temp}beat_crime_panel.dta", keep(1 3) keepusing(violent_rate property_rate)
 // Chicago police department data on crime at the beat level
!gzip "${temp}beat_crime_panel.dta"

assert _merge==3 if beat_num~=.
drop year _merge

rename violent_rate xxnbh_violent
rename property_rate xxnbh_property

rename beat_num xxbeat_num

*** Expand into (weekly) panel
********************************************************************************
 
* Create weekly panel 52*20=1040
expand 1040

bysort chdhsid_201212 proj_grp: gen year = 1990+int((_n-1)/52)
bysort chdhsid_201212 proj_grp year: gen week = _n 

* Create a stata-week variable
gen week2 = yw(year, week)
 format week2 %tw

* Create a stata date variable
gen date = dofw(week2)
 format date %tdDD_Mon_CCYY
 
* Create a month variable
gen month = month(date)

* Create a quarter variable
gen qtr = quarter(date)
 assert qtr>=1 & qtr<=4

* create time-varying age measure
gen bmonth=month(bdate)
gen byear=year(bdate)
gen age = year-byear+((month-bmonth)/12)

* create integer age within year to use with the panel specifications
bysort chdhsid_201212 year: egen age_yr = max(age)
replace age_yr = floor(age_yr)

rename chdhsid_200606 studyid
count if studyid=="" // none

* confirm data is unique at this level
duplicates tag chdhsid_201212 proj_grp year week, gen(dup)
assert dup==0
drop dup

*** Get (weekly) crime data (file created from ISP criminal records for the social assistance case file individuals)
********************************************************************************
!gunzip "${temp}weekly_crime_data_unbalanced.dta.gz"
merge n:1 studyid year week using "${temp}weekly_crime_data_unbalanced.dta", keep(1 3) keepusing(violent property drugs other chargedate)
!gzip "${temp}weekly_crime_data_unbalanced.dta"

* Fill-in crime measures (assuming the above is a complete history)
foreach var of varlist violent property other drugs { 
 replace `var' = 0 if _merge==1
 } 
drop _merge

*** Get panel employment info (UI data is quarterly)	
********************************************************************************
rename IDES_final IDES
destring IDES, replace
!gunzip "${labor}ui_1995_2009_clean_panel.dta.gz"
merge n:1 IDES year qtr using "${labor}ui_1995_2009_clean_panel.dta", keep(1 3) keepusing(wages njobs)
!gzip "${labor}ui_1995_2009_clean_panel.dta"
 
* assume this is a complete record
replace wages = 0 if _merge==1 & year<2011 & no_IDES==0
replace njobs = 0 if _merge==1 & year<2011 & no_IDES==0
gen employed =(wages>0 & _merge==3) if year<2011 & no_IDES==0

bysort chdhsid_201212 year qtr: gen n = 1 if _n==1
bysort chdhsid_201212 year: egen temp_qtrs_yr = total(employed*n)
drop n

assert wages~=. if no_IDES==0
assert njobs~=. if no_IDES==0
assert employed~=. if no_IDES==0

assert employed>0 if wages>0 & wages~=.
assert wages>0 if employed>0

drop _merge

*** Get panel assistance data (assistance data is monthly)	
********************************************************************************
!gunzip "${temp}temp_assist_monthly.dta.gz"
merge n:1 chdhsid_201212 year month using "${temp}temp_assist_monthly.dta", keep(1 3) keepusing(assistance foodstamp grant medicaid grantee)
!gzip "${temp}temp_assist_monthly.dta" 
 // this file is created above
  
* assume this is a complete record
foreach var of varlist assistance foodstamp grant medicaid grantee {
 replace `var' = 0 if _merge==1 
 }

drop _merge
 
*** Get panel address information
********************************************************************************

rename year temp_year
gen year = temp_year if temp_year>treat_yr-3

* Merge on the person-by-year addresses which also have the Census tract info for each address 
!gunzip "${temp}temp_addr_census_panel.dta.gz" // this file is created in a step above
merge n:1 chdhsid_201212 year using "${temp}temp_addr_census_panel.dta", keep(1 3) keepusing(zip state city address county2000 tract2000 lati longitude tpop p*)
!gzip "${temp}temp_addr_census_panel.dta"

rename pemployed punemployed
assert _merge==1 if year<treat_yr-3

drop year
rename temp_year year
 
gen has_addr_panel = (_merge==3)
 lab var has_addr_panel "=1 if _merge==3 with address panel, post-demo years"

* check
assert address~="" if has_addr_panel==1
 
drop _merge
 
* Merge PHDCN Neighborhood survey (this is at the tract level for Chicago only)
rename tract temp
gen tract = temp if county2000==31
* tract should be 6 digits
gen length = length(tract)
 assert length==6 | length==0

* cross-sectional version of PHDCD data file (for more information on this survey, see Sampson, Raudenbush and Earls 1997)
merge n:1 tract using "${demo}phcdn_xsec.dta", keep(1 3) keepusing(nbh_ebcoleff nbh_ebcapita nbh_ebpolice)

* checks
assert county2000==31 if _merge==3
drop _merge tract length
rename temp tract2000

* Merge panel neighborhood crime data (need to merge tracts to beats first)
rename tract2000 temp
gen tract = temp if county2000==31
gen length = length(tract)
assert length==6 | length==0 // ==0 if missing tract info
drop length

merge n:1 tract using "${demo}xwalk_beat_tract.dta", keep(1 3) keepusing(beat_num)
 // cross-walk file that links tracts to Chicago police beats

drop tract _merge

!gunzip "${temp}beat_crime_panel.dta.gz"
merge n:1 beat_num year using "${temp}beat_crime_panel.dta", keep(1 3) keepusing(violent_rate property_rate)
 // Chicago police department data on crime at the beat level
!gzip "${temp}beat_crime_panel.dta"

assert _merge==3 if beat_num~=. & year>=1995
drop _merge

rename violent_rate nbh_violent
rename property_rate nbh_property

rename temp tract2000

* Get PH-building info (this merges by the raw address info (address/state/city/zip) for PH-buildings only)

* rename building
rename bdg xxbdg
 /* this is the baseline building for the person */

!gunzip "${demo}xwalk_case_addr_PH_buildings.dta.gz"
merge n:1 address state city zip using "${demo}xwalk_case_addr_PH_buildings.dta", keep(1 3) keepusing(prjname bdgtype bunits bdg)
!gzip "${demo}xwalk_case_addr_PH_buildings.dta"
 // this file is created by "make_addr_PH_buildings_xwalk_final.do"

* checks
assert county2000==31 if _merge==3 // only Cook county projects should merge on
assert bunits>=40

* Define PH indicator for people with address history only
gen ph = .
 replace ph=1 if _merge==3 & bunits~=. // define PH=1 if address found a match
 replace ph=0 if address~="" & _merge==1 // define PH=0 (private housing) if has address that did NOT merge
 
gen ph_high_rise = (bunits>=70) if ph==1 | ph==0
gen ph_med_rise = (bunits<70 & bunits>=40) if ph==1 | ph==0

* checks
assert _merge==1 if year<treat_yr-3
assert ph==1 if _merge==3 & treat==1
 
drop _merge

*** Create final variables
********************************************************************************
   
* flag all post-demo obs
gen post = (year>treat_yr)
gen pre = (year<treat_yr)
 
* Calculate criminal activity in the post-demolition period
foreach var of varlist violent property drugs other {
 gen temp1 = `var' if age>=14 & post==1
 bysort chdhsid_201212: egen `var'_pd = total(temp1)
 drop temp1
}

gen arrests_all_pd = violent_pd+property_pd+drugs_pd+other_pd

* Calculate criminal activity in the first three years after demolition
gen temp1 = treat_yr+3
gen temp2 = 1 if year>treat_yr & year<=temp1

foreach var of varlist violent property drugs other {
 gen temp3 = `var' if age>=14 & temp2==1
 bysort chdhsid_201212: egen `var'_3pd = total(temp3)
 drop temp3
 assert `var'_3pd<=`var'_pd
}

drop temp1 temp2
 
sort chdhsid_201212 year week

* Yearly crime
by chdhsid_201212 year: egen violent_yr = total(violent)
by chdhsid_201212 year: egen property_yr = total(property)
by chdhsid_201212 year: egen drugs_yr = total(drugs)
by chdhsid_201212 year: egen other_yr = total(other)
 /* note that file is weekly, crime was measured in weeks in using (merged-on) file */

* Yearly labor
sort chdhsid_201212 year qtr
by chdhsid_201212 year qtr: gen n = 1 if _n==1 // flag each quarter uniquely (UI data is quarterly)
by chdhsid_201212 year: egen wages_yr = total(wages*n)
by chdhsid_201212 year: egen employed_yr = max(employed)
by chdhsid_201212 year: egen qtrs_yr = total(employed*n)
 drop n
 
assert qtrs_yr==temp_qtrs_yr
drop temp_qtrs_yr
 
replace wages_yr =. if no_IDES==1
replace employed_yr=. if no_IDES==1
replace qtrs_yr =. if no_IDES==1

* labor supply in the year prior to demolition 
gen temp = employed if qtr==1 & year==treat_yr-1 & xxage>15 & treat_yr~=1995
 replace temp = employed if qtr==1 & year==1995 & xxage>15 & treat_yr==1995  
bysort chdhsid_2012: egen xxemployed = max(temp) 
drop temp

gen temp = wages if qtr==1 & year==treat_yr-1 & xxage>15 & treat_yr~=1995
 replace temp = wages if qtr==1 & year==1995 & xxage>15 & treat_yr==1995  
bysort chdhsid_2012: egen xxwages = max(temp) 
drop temp

* Yearly assistance
by chdhsid_201212 year: egen assistance_yr = max(assistance)
by chdhsid_201212 year: egen foodstamp_yr = max(foodstamp)
by chdhsid_201212 year: egen medicaid_yr = max(medicaid)
by chdhsid_201212 year: egen grant_yr = max(grant)
by chdhsid_201212 year: egen grantee_yr = max(grantee)

* Crime over given ages in the post demolition period
foreach var of varlist violent property drugs other {
 gen temp1 = `var' if age>=14 & age<=18 & year>treat_yr
 bysort chdhsid_201212: egen `var'_14_18 = total(temp1)
 
 gen temp2 = `var' if age>18 & age<=22 & year>treat_yr
 bysort chdhsid_201212: egen `var'_19_22 = total(temp2)
 
 drop temp1 temp2
}

* check
assert violent_14_18==0 if adult==1
assert property_14_18==0 if adult==1
assert drugs_14_18==0 if adult==1
assert other_14_18==0 if adult==1

* reset all adults and older kids to missing for the 14-18 measure
foreach var of varlist violent property drugs other {
 replace `var'_14_18=. if age_5_13==0
 replace `var'_19_22=. if age_5_13==0
 lab var `var'_14_18 "`var' arrests at age 14-18 (inclusive), for kids 5-13 at demo"
 lab var `var'_19_22 "`var' arrests at age 19-22 (inclusive), for kids 5-13 at demo"
}

* set variable list to keep
global person      "chdhsid_201212 treat treat_yr birthdate age_* xx* adult adate proj_grp male no_IDES"
global time        "year post pre"
global crime       "violent_yr property_yr drugs_yr other_yr"
global labor       "wages_yr employed_yr qtrs_yr"
global assist      "assistance_yr foodstamp_yr medicaid_yr grant_yr grantee_yr"
global addr        "state city address zip lati longitude has_addr_panel"
global nbhd        "tpop pblack psingmom pcollege punemployed ppubass pbpov bunits bdg bdgtype ph ph_high_rise ph_med_rise nbh_* "
global others      "*_pd *_3pd *_14_18 *_19_22"

* flatten into yearly panel
keep ${person} ${time} ${crime} ${labor} ${assist} ${addr} ${nbhd} ${others}
duplicates drop

* check
bysort chdhsid_201212 proj_grp year: egen test = total(1==1) /* should be unique at this level */
assert test==1
drop test

* Create tag
egen tag = tag(chdhsid_201212)

* Distance from baseline location (project)

* compute distance between baseline address and subsequent address
geodist xxlatitude xxlongitude latitude longitude, gen(distance) miles
count if distance==.

replace distance=. if xxlatitude==.
replace distance=. if xxlongitude==.
replace distance=. if latitude==.
replace distance=. if longitude==.

* Generate a measure of distance and other neighborhood characteristics for the first 3 years after demolition
gen temp1 = treat_yr+3
gen temp2 = 1 if year>treat_yr & year<=temp1

foreach var of varlist distance tpop pblack psingmom pcollege punemployed ppubass pbpov nbh_violent nbh_property nbh_ebcoleff nbh_ebcapita {
gen temp3 = `var' if temp2==1
bysort chdhsid_201212: egen `var'_3pd = mean(temp3)
drop temp3
}

* Generate measure of a PH address 3 years after demo
foreach var of varlist has_addr_panel ph ph_high_rise ph_med_rise {
 gen temp3 = `var' if year==temp1
 bysort chdhsid_201212: egen `var'_3pd = max(temp3)
 drop temp3
}

lab var has_addr_panel_3pd "=1 if has addr 3 years after demo. missing if no addr"
lab var ph_3pd "=1 if has PH addr 3 years after demo. missing if no addr"
  
drop temp1 temp2

forvalues i=5(1)8 {
	
	* Generate a measure of distance and other neighborhood characteristics after demolition
	gen temp1 = treat_yr+`i'
	gen temp2 = 1 if year>treat_yr & year<=temp1
	
	foreach var of varlist distance tpop pblack psingmom pcollege punemployed ppubass pbpov nbh_violent nbh_property nbh_ebcoleff nbh_ebcapita {
		gen temp3 = `var' if temp2==1
		bysort chdhsid_201212: egen `var'_`i'pd = mean(temp3)
		drop temp3
	}

	gen temp3 = has_addr_panel if year==temp1
	bysort chdhsid_201212: egen has_addr_panel_`i'pd = max(temp3)
	  lab var has_addr_panel_`i'pd "=1 if has addr `i' years after demo. missing if no addr"
	drop temp3

	foreach var of varlist ph ph_high_rise ph_med_rise {
	 gen temp3 = `var' if year==temp1
	 bysort chdhsid_201212: egen `var'_`i'pd = max(temp3)
	 drop temp3
	}

	lab var ph_`i'pd "=1 if has PH addr `i' years after demo. missing if no addr"
	drop temp1 temp2

}

* Calculate # of years in PH residency after demo
gen temp1 = ph if post==1
bysort chdhsid_201212: egen ph_yrs_pd = total(temp1)
drop temp1

lab var ph_yrs_pd "# of PH addr years in post demo. only counts valid addr."

gen temp1 = has_addr_panel if post==1
bysort chdhsid_201212: egen has_addr_yrs_pd = total(temp1)
drop temp1

lab var has_addr_yrs_pd "# of non-missing addr in post demo period"

drop post pre
	
lab data "Yearly panel of arrest data. Analysis file."

** more labels
lab var nbh_violent "Beat lvl, yearly violent crime rate"
lab var nbh_property "Beat lvl, yearly property crime rate"

* 6/22/2014 -- add household composition variables
!gunzip "${temp}temp_file3_hh_vars_v2.dta.gz" // this file is created in a step above
merge n:1 chdhsid using "${temp}temp_file3_hh_vars_v2.dta", keep(1 3)
!gzip "${temp}temp_file3_hh_vars_v2.dta"

drop _merge

gen adult_w_kid = (hh_total_kids>0 & adult==1)

gen adult_w_girlkid = (hh_total_girls>0 & adult==1)
gen adult_w_boy_kid = (hh_total_boys>0 & adult==1)

gen hhh = 0
 replace hhh=1 if mother==1 & hh_has_father==0
 replace hhh=1 if father==1 & hh_has_father==1
 
* baseline measures of adult crime in household
foreach var of varlist mother father {
	gen temp = xxany_arrest if `var'==1 
	bysort household_id: egen `var'_xxany_arrest = mean(temp)
	drop temp
	}
	
* correct for no mother or no father
replace mother_xxany_arrest = 0 if hh_has_mother==0
replace father_xxany_arrest = 0 if hh_has_father==0
 
* manual fix 
replace hh_has_mother=0 if missing(mother_xxany_arrest) 
replace mother_xxany_arrest = 0 if missing(mother_xxany_arrest)

replace hh_has_father=0 if missing(father_xxany_arrest) 
replace father_xxany_arrest = 0 if missing(father_xxany_arrest)

assert hh_has_mother~=.
assert hh_has_father~=.

gen hh_no_mother = (hh_has_mother==0)
gen hh_no_father = (hh_has_father==0)

compress

gzsave "${demo}analysis_file_final.dta.gz", replace

}








  


  
  








  

	
